# Script Final for PH2 indicator calculation

# You need to start with the file with your monthtly average data,this file has to be in csv format 
# It does not matter if months are missing (we will do a regularization of
# of the time-serie in this script) but the first year HAS to start with a value for January, and the last year
# to have a value for December. Also years where more than 4 months are missing should be removed (the whole year has to be removed then)

# A scv.file as an example is provided, you should have your data exactly presented this way! (and the columns names should be EXACTLY similar to run the script without problems)

#Here we can start!
#1 Set your directories where you have your file
setwd("C:\\....) # of course, this is not the right code, you have to set you own directories!

#Download packages (so install them first if they are not installed yet on your R)
library(boot)
library(pastecs) 

#2. Read your file
PH2Z1=read.table("TOTCOP-monthlymean-BOB.csv",sep=";", header=TRUE,na.string="-9")
                 
# 3. Start of the analysis

# rename data and dates
NSZ1time<-PH2Z1$DATE   # dates  
NSZ1data<-PH2Z1$TOTCOP  # column abundance data 
data=as.matrix(NSZ1data)
time=as.matrix(NSZ1time)

# convert dates as text to R date format
time=strptime(time, "%d/%m/%Y")

# log transform data
data=log10(data+1)

# regularize irregular series to regular monthly with
# no missing data using linear interpolation
x=as.numeric(julian(time))
newdata=regul(x, data, units="daystoyears",frequency=12,
              methods="area",tol=3,n=700) 
# For n, always use a number much higher than the lenght of your time-series (in this example the lenght is 604, so we put 700)
newdata
# here n needs to be adjusted related to the lenght of the time series
# You have type "newdata", it will give you the number of interpolated values. In this case you can read "56 interpolated values on 604 ( 0 NAs padded at ends )
# The, you have to calculate the number of lines necessary, which equal to your data lenght (here= 604) and add it the number of interpolated values (=56)
# The new number to use is then 660.
newdata=regul(x, data, units="daystoyears",frequency=12,
              methods="area",tol=3,n=660)

# create a time series object from regul() output
rdata=tseries(newdata) # Now we have a regularized time-serie. You can just type "rdata" in R to vizualise your regularized time-serie and check everything looks fine.

# check that your time series is correct and that you don't have any hole in it!!! (otherwise you should not continue with the rest of the analysis, you have to fix the problem!)
rdata

#Start of the Decomposition
# decompose time series using seasonal difference method
# 1. compute mean annual cycle
ab=as.matrix(rdata)
mmean=rep(0,12) # monthly mean array
for (m in seq(1,12)){
  aa=seq(m,nrow(ab),12)
  mmean[m]=mean(ab[aa,])
}

# 2. remove seasonal cycle
nt=dim(ab)
nt=nt[1]
nyear=floor(nt/12)
repcycle=rep(mmean,nyear) # repeat mean cycle
repcycle=repcycle[1:nt]
ano=ab-repcycle # monthly anomalies
ano=as.ts(ano)

# save mensual anomalies
write.table(ano, file = "PH2ZOO-anomonthlyBOB.txt")

# 3. compute mean annual anomalies
anoyear=rep(0,nyear)
for (m in seq(1,nyear)){
  aa=seq(12*(m-1)+1,12*m)
  anoyear[m]=mean(ano[aa])
}

# convert series to time series1
repcycle=ts(repcycle,start=c(1958,1),frequency=12) # Here you have to adapt the start according to the first year of your time-serie of course
ano=ts(ano,start=c(1958,1),frequency=12)

# remove singleton dimension of ano
ano=drop(ano)
anoyear=ts(anoyear,start=c(1958,1,1),frequency=1)

# save annual anomalies
write.table(anoyear, file = "PH2ZOO-anoyearBOB.txt")

# Now, let's produce the graphs related to this analysis!
# Produce the grapg of the time-serie decomposition
x11()
par(mfrow=c(3,1))
plot(rdata,col=4)
title("Time series decomposition Zoo for BOB area")
plot(repcycle,type="l",col=4)
plot(ano,col=4)
# You have to save the graph produced as a picture or pdf in the right file!

# Produce the grapg of annual anomalies
windows()
year=1958:2012
plot(anoyear,type="h",xaxt="n",col=4)
axis(1,year,las=1.9)
title("Zoo Yearly anomalies for the Bay of Biscay")
# You have to save the graph produced as a picture or pdf in the right file (in the following format: ANO-ANUAL-PHYTO-L4)

# Here, you need to transform your file "PH2ZOO-anoyearBOB.txt" into a csv.file renaming your columns as in the example "PH2ZOO-anoyearBOB.csv"
# Ok, now we have the final results but we want to present them nicely!!!
# On the concept of the boxplot, we will qualify our results according to percentiles into 3 categories of anomalies

# clean everything to be sure
rm(list=ls())

# set your directories and read your data
setwd("C:\\Users\\...) # same as before, set your directories!
Anom=read.table("PH2ZOO-anoyearBOB.csv",sep=";", header=TRUE)

#We fix the median and limits based on quantiles
valeur_med = quantile(Anom[,2],c(0.5)) # median
LCS = quantile(Anom[,2],c(0.975)) # uppest superior limit
LCI = quantile(Anom[,2],c(0.025)) # lowest inferior limit
LSS = quantile(Anom[,2],c(0.75)) # 1st superior limit
LSI = quantile(Anom[,2],c(0.25)) # 1st inferior limit
lim_sup = quantile(Anom[,2],c(1))
lim_inf = quantile(Anom[,2],c(0))

# Make the final result graph

plot(Anom[,1],Anom[,2] , ylim = c(lim_inf,lim_sup),xlim = c(2001, 2013),las=2,xlab="",lab=c(16, 7, 12),pch=4,cex=1.2,lwd=2,axes=F,ylab="indice") # Of course, you have to change your xlim according to the start and the stop fo your time-serie and adapt the numbers in the "lab" in order to fix correctly your x axis stickers
axis(1,xlim = c(1958, 2012),xlab="")
# To trace polygons zones
polygon(x=c(min(Anom[,1])-20000,max(Anom[,1])+20000,max(Anom[,1])+20000,min(Anom[,1])-20000),y=c(lim_inf,lim_inf,lim_sup,lim_sup),col="#F5A9A9CC",border=NA)
polygon(x=c(min(Anom[,1])-20000,max(Anom[,1])+20000,max(Anom[,1])+20000,min(Anom[,1])-20000),y=c(LCI,LCI,LCS,LCS),col="#F7D358CC",border=NA)
polygon(x=c(min(Anom[,1])-20000,max(Anom[,1])+20000,max(Anom[,1])+20000,min(Anom[,1])-20000),y=c(LSI,LSI,LSS,LSS),col="#82FA58CC",border=NA)
points(Anom[,1],Anom[,2],pch=4,cex=1.2,lwd=2) # Replace the points above the polygons
# Add the lines which limit the different zones
abline(h=valeur_med, col="black")
abline(h=c(LSI,LSS), col="black", lty=3)
abline(h=c(LCI,LCS), col="black", lty=2)
col=c("#F5A9A9CC","#F7D358CC","#82FA58CC")
legend("topright", inset = 0,cex=1,legend = c("extreme change","important change","small change"),fill=col,horiz=F,box.lty=0)# you have to deplace the legend according to your results
title("PH2 zooplankton indice for the Bay of Biscay") #. Of course adapt the name according to your data set!!!!

# Keep the graph produced as a jpeg or TIFF picture with a name in this format: PH2ZOO-BOB-ALL.jpeg



# Here we are! :) The script is over and you have your indicator :)
